{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {},
   "outputs": [],
   "source": [
    "%matplotlib inline\n",
    "\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import os\n",
    "from tqdm import tqdm\n",
    "import lightgbm as lgb\n",
    "from sklearn.model_selection import StratifiedKFold\n",
    "from sklearn import metrics\n",
    "import warnings\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "pd.set_option('display.max_columns', 100)\n",
    "warnings.filterwarnings('ignore')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 97,
   "metadata": {},
   "outputs": [],
   "source": [
    "def group_feature(df, key, target, aggs):   \n",
    "    agg_dict = {}\n",
    "    for ag in aggs:\n",
    "        agg_dict[f'{target}_{ag}'] = ag\n",
    "    print(agg_dict)\n",
    "    t = df.groupby(key)[target].agg(agg_dict).reset_index()\n",
    "    return t\n",
    "\n",
    "def extract_feature(df, train):\n",
    "    t = group_feature(df, 'ship','x',['max','min','mean','std','skew','sum'])\n",
    "    train = pd.merge(train, t, on='ship', how='left')\n",
    "    t = group_feature(df, 'ship','x',['count'])\n",
    "    train = pd.merge(train, t, on='ship', how='left')\n",
    "    t = group_feature(df, 'ship','y',['max','min','mean','std','skew','sum'])\n",
    "    train = pd.merge(train, t, on='ship', how='left')\n",
    "    t = group_feature(df, 'ship','v',['max','min','mean','std','skew','sum'])\n",
    "    train = pd.merge(train, t, on='ship', how='left')\n",
    "    t = group_feature(df, 'ship','d',['max','min','mean','std','skew','sum'])\n",
    "    train = pd.merge(train, t, on='ship', how='left')\n",
    "    train['x_max_x_min'] = train['x_max'] - train['x_min']\n",
    "    train['y_max_y_min'] = train['y_max'] - train['y_min']\n",
    "    train['y_max_x_min'] = train['y_max'] - train['x_min']\n",
    "    train['x_max_y_min'] = train['x_max'] - train['y_min']\n",
    "    train['slope'] = train['y_max_y_min'] / np.where(train['x_max_x_min']==0, 0.001, train['x_max_x_min'])\n",
    "    train['area'] = train['x_max_x_min'] * train['y_max_y_min']\n",
    "    \n",
    "    mode_hour = df.groupby('ship')['hour'].agg(lambda x:x.value_counts().index[0]).to_dict()\n",
    "    train['mode_hour'] = train['ship'].map(mode_hour)\n",
    "    \n",
    "    t = group_feature(df, 'ship','hour',['max','min'])\n",
    "    train = pd.merge(train, t, on='ship', how='left')\n",
    "    \n",
    "    hour_nunique = df.groupby('ship')['hour'].nunique().to_dict()\n",
    "    date_nunique = df.groupby('ship')['date'].nunique().to_dict()\n",
    "    train['hour_nunique'] = train['ship'].map(hour_nunique)\n",
    "    train['date_nunique'] = train['ship'].map(date_nunique)\n",
    "\n",
    "    t = df.groupby('ship')['time'].agg({'diff_time':lambda x:np.max(x)-np.min(x)}).reset_index()\n",
    "    t['diff_day'] = t['diff_time'].dt.days\n",
    "    t['diff_second'] = t['diff_time'].dt.seconds\n",
    "    train = pd.merge(train, t, on='ship', how='left')\n",
    "    return train\n",
    "\n",
    "def extract_dt(df):\n",
    "    df['time'] = pd.to_datetime(df['time'], format='%m%d %H:%M:%S')\n",
    "    # df['month'] = df['time'].dt.month\n",
    "    # df['day'] = df['time'].dt.day\n",
    "    df['date'] = df['time'].dt.date\n",
    "    df['hour'] = df['time'].dt.hour\n",
    "    # df = df.drop_duplicates(['ship','month'])\n",
    "    df['weekday'] = df['time'].dt.weekday\n",
    "    return df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 70,
   "metadata": {},
   "outputs": [],
   "source": [
    "train = pd.read_hdf('../input/train.h5')\n",
    "# train = df.drop_duplicates(['ship','type'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 71,
   "metadata": {},
   "outputs": [],
   "source": [
    "test = pd.read_hdf('../input/test.h5')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 72,
   "metadata": {},
   "outputs": [],
   "source": [
    "train = extract_dt(train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "metadata": {},
   "outputs": [],
   "source": [
    "test = extract_dt(test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 107,
   "metadata": {},
   "outputs": [],
   "source": [
    "train_label = train.drop_duplicates('ship')\n",
    "test_label = test.drop_duplicates('ship')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 108,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "拖网    0.623000\n",
       "围网    0.231571\n",
       "刺网    0.145429\n",
       "Name: type, dtype: float64"
      ]
     },
     "execution_count": 108,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_label['type'].value_counts(1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 99,
   "metadata": {},
   "outputs": [],
   "source": [
    "type_map = dict(zip(train_label['type'].unique(), np.arange(3)))\n",
    "type_map_rev = {v:k for k,v in type_map.items()}\n",
    "train_label['type'] = train_label['type'].map(type_map)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 100,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'x_max': 'max', 'x_min': 'min', 'x_mean': 'mean', 'x_std': 'std', 'x_sum': 'sum'}\n",
      "{'x_count': 'count'}\n",
      "{'y_max': 'max', 'y_min': 'min', 'y_mean': 'mean', 'y_std': 'std', 'y_sum': 'sum'}\n",
      "{'v_max': 'max', 'v_min': 'min', 'v_mean': 'mean', 'v_std': 'std', 'v_sum': 'sum'}\n",
      "{'d_max': 'max', 'd_min': 'min', 'd_mean': 'mean', 'd_std': 'std', 'd_sum': 'sum'}\n",
      "{'hour_max': 'max', 'hour_min': 'min'}\n"
     ]
    }
   ],
   "source": [
    "train_label = extract_feature(train, train_label)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 101,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'x_max': 'max', 'x_min': 'min', 'x_mean': 'mean', 'x_std': 'std', 'x_sum': 'sum'}\n",
      "{'x_count': 'count'}\n",
      "{'y_max': 'max', 'y_min': 'min', 'y_mean': 'mean', 'y_std': 'std', 'y_sum': 'sum'}\n",
      "{'v_max': 'max', 'v_min': 'min', 'v_mean': 'mean', 'v_std': 'std', 'v_sum': 'sum'}\n",
      "{'d_max': 'max', 'd_min': 'min', 'd_mean': 'mean', 'd_std': 'std', 'd_sum': 'sum'}\n",
      "{'hour_max': 'max', 'hour_min': 'min'}\n"
     ]
    }
   ],
   "source": [
    "test_label = extract_feature(test, test_label)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 102,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "features = [x for x in train_label.columns if x not in ['ship','type','time','diff_time','date']]\n",
    "target = 'type'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 103,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "40 x,y,v,d,hour,weekday,x_max,x_min,x_mean,x_std,x_sum,x_count,y_max,y_min,y_mean,y_std,y_sum,v_max,v_min,v_mean,v_std,v_sum,d_max,d_min,d_mean,d_std,d_sum,x_max_x_min,y_max_y_min,y_max_x_min,x_max_y_min,slope,area,mode_hour,hour_max,hour_min,hour_nunique,date_nunique,diff_day,diff_second\n"
     ]
    }
   ],
   "source": [
    "print(len(features), ','.join(features))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 104,
   "metadata": {},
   "outputs": [],
   "source": [
    "params = {\n",
    "    'n_estimators': 5000,\n",
    "    'boosting_type': 'gbdt',\n",
    "    'objective': 'multiclass',\n",
    "    'num_class': 3,\n",
    "    'early_stopping_rounds': 100,\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 105,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Training until validation scores don't improve for 100 rounds\n",
      "[100]\ttraining's multi_logloss: 0.0828667\tvalid_1's multi_logloss: 0.269078\n",
      "[200]\ttraining's multi_logloss: 0.022058\tvalid_1's multi_logloss: 0.264574\n",
      "Early stopping, best iteration is:\n",
      "[180]\ttraining's multi_logloss: 0.0284972\tvalid_1's multi_logloss: 0.262031\n",
      "0 val f1 0.8744567161504285\n",
      "Training until validation scores don't improve for 100 rounds\n",
      "[100]\ttraining's multi_logloss: 0.085238\tvalid_1's multi_logloss: 0.274897\n",
      "[200]\ttraining's multi_logloss: 0.0222402\tvalid_1's multi_logloss: 0.272668\n",
      "Early stopping, best iteration is:\n",
      "[153]\ttraining's multi_logloss: 0.0416896\tvalid_1's multi_logloss: 0.268232\n",
      "1 val f1 0.8570390224496975\n",
      "Training until validation scores don't improve for 100 rounds\n",
      "[100]\ttraining's multi_logloss: 0.0839062\tvalid_1's multi_logloss: 0.266458\n",
      "[200]\ttraining's multi_logloss: 0.0228758\tvalid_1's multi_logloss: 0.25578\n",
      "Early stopping, best iteration is:\n",
      "[164]\ttraining's multi_logloss: 0.0363628\tvalid_1's multi_logloss: 0.254512\n",
      "2 val f1 0.8808118299909231\n",
      "Training until validation scores don't improve for 100 rounds\n",
      "[100]\ttraining's multi_logloss: 0.0845035\tvalid_1's multi_logloss: 0.272673\n",
      "[200]\ttraining's multi_logloss: 0.0225549\tvalid_1's multi_logloss: 0.277392\n",
      "Early stopping, best iteration is:\n",
      "[108]\ttraining's multi_logloss: 0.0758342\tvalid_1's multi_logloss: 0.270036\n",
      "3 val f1 0.8629486588985998\n",
      "Training until validation scores don't improve for 100 rounds\n",
      "[100]\ttraining's multi_logloss: 0.0815182\tvalid_1's multi_logloss: 0.296271\n",
      "[200]\ttraining's multi_logloss: 0.0211976\tvalid_1's multi_logloss: 0.295628\n",
      "Early stopping, best iteration is:\n",
      "[160]\ttraining's multi_logloss: 0.0357663\tvalid_1's multi_logloss: 0.290207\n",
      "4 val f1 0.8549111545740181\n"
     ]
    }
   ],
   "source": [
    "fold = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)\n",
    "\n",
    "X = train_label[features].copy()\n",
    "y = train_label[target]\n",
    "models = []\n",
    "pred = np.zeros((len(test_label),3))\n",
    "oof = np.zeros((len(X), 3))\n",
    "for index, (train_idx, val_idx) in enumerate(fold.split(X, y)):\n",
    "\n",
    "    train_set = lgb.Dataset(X.iloc[train_idx], y.iloc[train_idx])\n",
    "    val_set = lgb.Dataset(X.iloc[val_idx], y.iloc[val_idx])\n",
    "\n",
    "    model = lgb.train(params, train_set, valid_sets=[train_set, val_set], verbose_eval=100)\n",
    "    models.append(model)\n",
    "    val_pred = model.predict(X.iloc[val_idx])\n",
    "    oof[val_idx] = val_pred\n",
    "    val_y = y.iloc[val_idx]\n",
    "    val_pred = np.argmax(val_pred, axis=1)\n",
    "    print(index, 'val f1', metrics.f1_score(val_y, val_pred, average='macro'))\n",
    "    # 0.8695539641133697\n",
    "    # 0.8866211724839532\n",
    "\n",
    "    test_pred = model.predict(test_label[features])\n",
    "    pred += test_pred/5"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 106,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "oof f1 0.8660762740409558\n"
     ]
    }
   ],
   "source": [
    "oof = np.argmax(oof, axis=1)\n",
    "print('oof f1', metrics.f1_score(oof, y, average='macro'))\n",
    "# 0.8701544575329372"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 152,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1    0.6325\n",
      "0    0.2390\n",
      "2    0.1285\n",
      "Name: pred, dtype: float64\n"
     ]
    }
   ],
   "source": [
    "pred = np.argmax(pred, axis=1)\n",
    "sub = test_label[['ship']]\n",
    "sub['pred'] = pred\n",
    "\n",
    "print(sub['pred'].value_counts(1))\n",
    "sub['pred'] = sub['pred'].map(type_map_rev)\n",
    "sub.to_csv('result.csv', index=None, header=None)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 84,
   "metadata": {},
   "outputs": [],
   "source": [
    "ret = []\n",
    "for index, model in enumerate(models):\n",
    "    df = pd.DataFrame()\n",
    "    df['name'] = model.feature_name()\n",
    "    df['score'] = model.feature_importance()\n",
    "    df['fold'] = index\n",
    "    ret.append(df)\n",
    "    \n",
    "df = pd.concat(ret)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 85,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = df.groupby('name', as_index=False)['score'].mean()\n",
    "df = df.sort_values(['score'], ascending=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 86,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>name</th>\n",
       "      <th>score</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>37</th>\n",
       "      <td>y_max_x_min</td>\n",
       "      <td>676.4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>36</th>\n",
       "      <td>y_max</td>\n",
       "      <td>624.6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31</th>\n",
       "      <td>x_min</td>\n",
       "      <td>611.4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>x_max_y_min</td>\n",
       "      <td>568.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>v_std</td>\n",
       "      <td>535.6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>35</th>\n",
       "      <td>y</td>\n",
       "      <td>512.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>x</td>\n",
       "      <td>458.8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>v_skew</td>\n",
       "      <td>445.4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40</th>\n",
       "      <td>y_min</td>\n",
       "      <td>422.6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32</th>\n",
       "      <td>x_skew</td>\n",
       "      <td>419.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>41</th>\n",
       "      <td>y_skew</td>\n",
       "      <td>416.4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>slope</td>\n",
       "      <td>398.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>x_max</td>\n",
       "      <td>373.4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>diff_second</td>\n",
       "      <td>368.6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>33</th>\n",
       "      <td>x_std</td>\n",
       "      <td>343.8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>d_mean</td>\n",
       "      <td>342.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>v</td>\n",
       "      <td>341.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>v_max</td>\n",
       "      <td>338.6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>v_mean</td>\n",
       "      <td>331.8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>d_std</td>\n",
       "      <td>331.4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>39</th>\n",
       "      <td>y_mean</td>\n",
       "      <td>320.4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30</th>\n",
       "      <td>x_mean</td>\n",
       "      <td>319.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>42</th>\n",
       "      <td>y_std</td>\n",
       "      <td>285.4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>v_sum</td>\n",
       "      <td>271.4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>x_count</td>\n",
       "      <td>265.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>d_sum</td>\n",
       "      <td>262.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>x_max_x_min</td>\n",
       "      <td>258.4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>d</td>\n",
       "      <td>252.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>34</th>\n",
       "      <td>x_sum</td>\n",
       "      <td>241.8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>d_skew</td>\n",
       "      <td>239.4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>38</th>\n",
       "      <td>y_max_y_min</td>\n",
       "      <td>233.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>area</td>\n",
       "      <td>225.6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>43</th>\n",
       "      <td>y_sum</td>\n",
       "      <td>204.4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>mode_hour</td>\n",
       "      <td>177.8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>d_max</td>\n",
       "      <td>155.6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>v_min</td>\n",
       "      <td>61.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>hour</td>\n",
       "      <td>26.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>date_nunique</td>\n",
       "      <td>25.6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>weekday</td>\n",
       "      <td>23.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>diff_day</td>\n",
       "      <td>20.4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>d_min</td>\n",
       "      <td>15.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>hour_nunique</td>\n",
       "      <td>1.4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>hour_min</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>hour_max</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            name  score\n",
       "37   y_max_x_min  676.4\n",
       "36         y_max  624.6\n",
       "31         x_min  611.4\n",
       "29   x_max_y_min  568.0\n",
       "22         v_std  535.6\n",
       "35             y  512.0\n",
       "25             x  458.8\n",
       "21        v_skew  445.4\n",
       "40         y_min  422.6\n",
       "32        x_skew  419.2\n",
       "41        y_skew  416.4\n",
       "16         slope  398.2\n",
       "27         x_max  373.4\n",
       "10   diff_second  368.6\n",
       "33         x_std  343.8\n",
       "3         d_mean  342.2\n",
       "17             v  341.2\n",
       "18         v_max  338.6\n",
       "19        v_mean  331.8\n",
       "6          d_std  331.4\n",
       "39        y_mean  320.4\n",
       "30        x_mean  319.0\n",
       "42         y_std  285.4\n",
       "23         v_sum  271.4\n",
       "26       x_count  265.0\n",
       "7          d_sum  262.0\n",
       "28   x_max_x_min  258.4\n",
       "1              d  252.2\n",
       "34         x_sum  241.8\n",
       "5         d_skew  239.4\n",
       "38   y_max_y_min  233.2\n",
       "0           area  225.6\n",
       "43         y_sum  204.4\n",
       "15     mode_hour  177.8\n",
       "2          d_max  155.6\n",
       "20         v_min   61.0\n",
       "11          hour   26.0\n",
       "8   date_nunique   25.6\n",
       "24       weekday   23.2\n",
       "9       diff_day   20.4\n",
       "4          d_min   15.2\n",
       "14  hour_nunique    1.4\n",
       "13      hour_min    0.0\n",
       "12      hour_max    0.0"
      ]
     },
     "execution_count": 86,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.6"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
